#!/usr/bin/env python
# coding: utf-8

# In[1]:


pip install plaidml-keras


# In[2]:


pip install hyperopt


# In[3]:


import plaidml
import plaidml.keras
plaidml.keras.install_backend()


# In[2]:


import pandas as pd
pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
import seaborn as sns
import matplotlib.dates as mdates
import numpy as np # linear algebra
import math   
from datetime import datetime, date 

import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split

try : 
    from bayes_opt import BayesianOptimization 
except : 
    get_ipython().system('pip install bayesian-optimization')
    
import hyperopt
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

from sklearn.preprocessing import MinMaxScaler

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.metrics import r2_score

try : 
    import keras
except :
    get_ipython().system('pip install keras')
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import SimpleRNN, LSTM, GRU
from keras.layers import Input, Dense, Activation, Flatten, Dropout

print("Setup Complete")


# In[3]:


dam = pd.read_csv("juam_control_dam.csv", encoding= 'cp949')
dam


# In[4]:


dam.describe()
# dam.ndim
# dam.shape
# dam.head()
# dam.tail


# In[8]:


dam.columns = ['date', 'water_level', 'rainfall', 'inflow', 'outflow']
dam.head()


# In[9]:


dam['inflow'] = pd.to_numeric(dam['inflow'],errors='coerce')
dam['outflow'] = pd.to_numeric(dam['outflow'],errors='coerce')


# In[10]:


dam.describe()


# In[11]:


dam = dam.dropna(axis=0)


# In[14]:


dam=dam[dam['water_level']>0]
dam=dam[dam['rainfall']>=0]
dam=dam[dam['inflow']>=0]
dam=dam[dam['outflow']>=0]


# In[15]:


dam.describe()


# In[16]:


dam = dam.reset_index(drop=True)


# In[17]:


plt.figure(figsize=(25,6))
sns.boxplot(data=dam['outflow'], color='red')
plt.show()


# In[16]:


Q1 = dam['outflow'].quantile(0.25)
Q3 = dam['outflow'].quantile(0.75)
IQR = Q3 - Q1    #IQR is interquartile range. 

filter = (dam['outflow'] >= Q1 - 1.5 * IQR) & (dam['outflow'] <= Q3 + 1.5 *IQR)
dam = dam.loc[filter]
print(dam['outflow'].describe())
print('\n')
print(dam['outflow'].describe())


# In[18]:


dam


# In[19]:


dam = dam.reset_index(drop=True)


# In[20]:


features = dam[['water_level', 'rainfall', 'inflow', 'outflow']]
print(features)


# In[21]:


from sklearn.preprocessing import MinMaxScaler

scaler=MinMaxScaler()
scaler.fit(features)
dam_scaled=scaler.transform(features)

dam_scaled=pd.DataFrame(data=dam_scaled, columns=list(features))
print('feature 최솟값')
print(dam_scaled.min())
print('\n''fearure 최댓값')
print(dam_scaled.max())


# In[22]:


dam['variation'] = 0
for i, idx in enumerate(dam.index):
    dam.loc[idx, 'variation'] = dam_scaled['rainfall'][i]+dam_scaled['inflow'][i]-dam_scaled['outflow'][i]


# In[23]:


df = dam['variation']
df


# In[24]:


ds_list = []

for i in range(1,len(df)):
    ds = df[i]-df[i-1]
    ds_list.append(ds)
    
ds_dt = pd.DataFrame(ds_list)


# In[25]:


ds_dt.columns = ['ds_dt']
print(ds_dt)


# In[26]:


ds_dt['ds_dt'] = pd.to_numeric(ds_dt['ds_dt'],errors='coerce')
print(ds_dt)


# In[27]:


dam = dam[1:]


# In[28]:


dam = dam.reset_index(drop=True)
print(dam)


# In[29]:


dam['ds_dt'] = ds_dt
print(dam)


# In[30]:


dam = dam.reset_index(drop=True)
print(dam)


# In[31]:


dam["date"] = pd.to_datetime(dam["date"])


# In[32]:


dam.index = dam["date"]


# In[33]:


features = dam[['water_level', 'rainfall', 'inflow', 'outflow', 'ds_dt']]
print(features)


# In[34]:


def non_feature_engineering(raw):
    raw_nfe = raw.copy()
    if 'date' in raw_nfe.columns:
        raw_nfe['datetime'] = pd.to_datetime(raw_nfe['date'])
        raw_nfe['DateTime'] = pd.to_datetime(raw_nfe['datetime'])
    if raw_nfe.index.dtype == 'int64':
        raw_nfe.set_index('DateTime', inplace=True)
    # bring back
    # if raw_nfe.index.dtype != 'int64':
    #     raw_nfe.reset_index(drop=False, inplace=True)
    raw_nfe = raw_nfe.asfreq('H', method='ffill')
    return raw_nfe
# raw_rd = non_feature_engineering(raw_all)


raw_nfe = non_feature_engineering(features)
raw_nfe


# In[35]:


dam.isnull().sum()


# In[36]:


print(dam.water_level.isnull().sum())
print(dam.rainfall.isnull().sum())
print(dam.inflow.isnull().sum())
print(dam.outflow.isnull().sum())


# In[37]:


raw_nfe.isnull().sum()


# In[38]:


raw_nfe.describe()


# In[39]:


171568*0.8


# In[40]:


raw_nfe[:137254]


# In[40]:


def datasplit_ts(raw, Y_colname, X_colname, criteria):
    raw_train = raw.loc[raw.index < criteria,:]
    raw_test = raw.loc[raw.index >= criteria,:]
    Y_train = raw_train[Y_colname]
    X_train = raw_train[X_colname]
    Y_test = raw_test[Y_colname]
    X_test = raw_test[X_colname]
    print('Train_size:', raw_train.shape, 'Test_size:', raw_test.shape)
    print('X_train:', X_train.shape, 'Y_train:', Y_train.shape)
    print('X_test:', X_test.shape, 'Y_test:', Y_test.shape)
    return X_train, X_test, Y_train, Y_test


# In[41]:


space={'batch_size': hp.quniform("batch_size", 64, 257, 64),
       'sequence': hp.quniform ('sequence', 24, 169, 24),
       #'dropout_ratio': hp.quniform('dropout_ratio', 0, 0.6, 0.1),
       'epoch' : hp.quniform('epoch', 10, 51, 10)
      }


# In[42]:


def hyperparameter_tuning(space):
    
    sequence = int(space['sequence'])
    
    Y_colname = ['water_level']
    X_remove = ['date', 'datetime','variation']
    X_colname = [x for x in features.columns if x not in Y_colname+X_remove]
    X_train, X_test, Y_train, Y_test = datasplit_ts(raw_nfe, Y_colname, X_colname, '2017-08-28')
    # Parameters
    scaler_X_tr = MinMaxScaler()
    scaler_Y_tr = MinMaxScaler()

    # Feature Engineering
    X_train_scaled = scaler_X_tr.fit_transform(X_train)
    Y_train_scaled = scaler_Y_tr.fit_transform(Y_train)
    X_test_scaled = scaler_X_tr.transform(X_test)
    Y_test_scaled = scaler_Y_tr.transform(Y_test)
    
    ## X / Y
    X_train, Y_train = [], []
    for index in range(len(X_train_scaled) - sequence):
        X_train.append(np.array(X_train_scaled[index: index + sequence]))
        Y_train.append(np.ravel(Y_train_scaled[index + sequence:index + sequence + 1]))
    X_train, Y_train = np.array(X_train), np.array(Y_train)
    X_test, Y_test = [], []
    for index in range(len(X_test_scaled) - sequence):
        X_test.append(np.array(X_test_scaled[index: index + sequence]))
        Y_test.append(np.ravel(Y_test_scaled[index + sequence:index + sequence + 1]))  
    X_test, Y_test = np.array(X_test), np.array(Y_test) 

    # Retype and Reshape
    X_train = X_train.reshape(X_train.shape[0], sequence, -1)
    X_test = X_test.reshape(X_test.shape[0], sequence, -1)
#     print('X_train:', X_train.shape, 'Y_train:', Y_train.shape)
#     print('X_test:', X_test.shape, 'Y_test:', Y_test.shape)

    evaluation = [(X_train, Y_train), (X_test, Y_test)]
    
    # GRU
    model = Sequential()
    model.add(GRU(128, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True, activation='relu'))
    #model.add(Dropout(space['dropout_ratio'])) 
    model.add(GRU(64, return_sequences=False, activation="relu"))
    #model.add(Dropout(space['dropout_ratio'])) 
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    # model.summary()
    model_fit = model.fit(X_train, Y_train, 
                              batch_size=int(space['batch_size']), epochs= int(space['epoch']),
                              verbose=0)
    pred = model.predict(X_test)
    mse= mean_squared_error(Y_test, pred)
    r2 = r2_score(Y_test, pred)

    return {'loss':mse, 'status': STATUS_OK, 'model': model}
        
    


# In[43]:


import torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


# In[44]:


trials = Trials()

#max_evals가 몇번 진행할 것인지를 결정하는데, 많이 하면 할수록 더 정확성이 높아질 거임
best = fmin(fn=hyperparameter_tuning,
            space=space,
            algo=tpe.suggest,
            max_evals=3,
            trials=trials)#.to(device)

best['batch_size'] = best['batch_size']
best['sequence'] = best['sequence']
#best['dropout_ratio'] = best['dropout_ratio']
best['epoch'] = best['epoch']
print (best)


# In[ ]:




